Main analysis of chapter three

Author
Affiliation

Ryan Leadbetter

Centre for Transforming Maintenance through Data Science, Curtin University

Published

February 7, 2025

Show setup chunk
library(dplyr)
library(ggplot2)
library(ggdist)
library(cowplot)
library(rstan)
library(posterior)
library(tidybayes)
library(bayesplot)
library(kableExtra)
library(lubridate)
library(tidyverse)



rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

fig_path <- file.path(
  "..",
  "..",
  "figures",
  "ch-3"
)
tbl_path <- file.path(
  "..",
  "..",
  "tables",
  "ch-3"
)
Show runctions chunk
# Function to sample from a truncated normal distribution
rTruncNorm <- function(n, mean, sd, lb = NULL, ub = NULL) {
  # Check if lower bound is supplied & 
  # calculate lower bound of uniform dist
  if (!is.null(lb)) {
    lb_unif <- pnorm((lb - mean) / sd)
  } else {
    lb_unif <- 0
  }
  if (!is.null(ub)) {
    ub_unif <- pnorm((ub - mean) / sd)
  } else {
    ub_unif <- 1
  }
  # Sample from uniform distribution
  unif_rv <- runif(n, lb_unif, ub_unif)
  # Probability integral transformation
  norm_rv <- (qnorm(unif_rv) * sd) + mean
  
  return(norm_rv)
}
# function used to calculate weibull draws in joint prior
fn <- function(x) {
  log(-log(1 - x))
}

This markdown contains the code to reproduce the main analysis in Chap. 3. Here, I analyse the idler-frame failure time data from an overland iron ore conveyor. The first five rows of the data are shown in Table 1.

idler_data <- readRDS(
  file.path("..", "..", "data", "idler_frame_life_example.RDS")
)
# add missing lifetime
idler_data <- rbind(
  idler_data,
  data.frame(
    start = ymd("2020-11-15"),
    end = ymd("2021-01-11"),
    censored_at_start = FALSE,
    censored_at_end = TRUE,
    lifetime = ymd("2021-01-11") - ymd("2020-11-15"),
    frame_number = "78"
  )
)
head(idler_data) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Table 1: The first five rows of the idler frame failures dataset.
start end censored_at_start censored_at_end lifetime frame_number
2014-12-10 2020-11-15 TRUE FALSE 2167 days 78
2014-12-10 2018-10-04 TRUE FALSE 1394 days 125
2019-04-05 2020-09-05 FALSE FALSE 519 days 125
2020-10-03 2021-01-11 FALSE TRUE 100 days 125
2014-12-10 2017-12-31 TRUE FALSE 1117 days 14
2018-04-28 2020-08-29 FALSE FALSE 854 days 14

The dataset contains the replacemens records of idlers-frames on an iron ore conveyor over the past six years. The plant (conveyor) has been in opperation for twenty years but replacements have only been reliably recorded at the frame level for the past six. Table 2 gives a description of the dataset. Figure 1 plots the lifetimes of each frame along the length of the conveyor. The frames number is on the horizontal axis and the value of the lifetimes is on the vertical.

summary_df <- data.frame(
  `Maximum lifetime` = max(idler_data$lifetime),
  `Minimum lifetime` = min(idler_data$lifetime),
  `Maximum fully observed lifetime` = idler_data %>%
    filter(!(censored_at_start | censored_at_end)) %>%
    pull(lifetime) %>%
    max(),
  `Beginning of observation` = min(idler_data$start),
  `End of observation` = max(idler_data$start),
  `Number of observations` = idler_data %>%
    nrow(),
  `Number of unique frames` = idler_data %>%
    pull(frame_number) %>%
    unique() %>%
    length(),
  `Number of left truncated observations` = idler_data %>%
    filter(censored_at_start) %>%
    nrow(),
  `Number of right censored observations` = idler_data %>%
    filter(censored_at_end) %>%
    nrow(),
  `Number of left truncated and right censored observations` = idler_data %>%
    filter((censored_at_end & censored_at_start)) %>%
    nrow(),
  check.names = FALSE
) %>%
t()

summary_df %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Table 2: Summary of the idler-frame dataset.
Maximum lifetime 2167 days
Minimum lifetime 1 days
Maximum fully observed lifetime 1461 days
Beginning of observation 2014-12-10
End of observation 2020-11-15
Number of observations 402
Number of unique frames 143
Number of left truncated observations 143
Number of right censored observations 144
Number of left truncated and right censored observations 1
Figure 1: The idler frame lifetimes. The frame number that the lifetime belongs to is on the horizontal axes and the log lifetime is plotted on the virtical axis. Fully observed lifetimes are shown in red while the lifetimes that are only partialy observed are shown in blue. Those lifetimes that are also left-truncated are indicated by tryangles.

In Figure 1, the red points are fully observed lifetimes (i.e. where we observed the instalation and failure of the idlers in the frame), whereas blue points are partialy observed lifetimes (where either the instalation, failure time, or both are unknown). The triangular blue points are points where the isntalation of the frame is unknown. As discussed in the main chapter, a conventional way of dealing with these left-truncated lifetimes with unknown exposure history is to simply discard them. If we were to discard the left-truncated samples in this dataset then we would be throwing away 35.7% of the data.

There are a few fully observed lifetimes that are very short (less than three weeks);

idler_data %>%
  filter(
    !(censored_at_start | censored_at_end),
    lifetime < (7 * 3)
  )
        start        end censored_at_start censored_at_end lifetime
1  2017-06-22 2017-06-25             FALSE           FALSE   3 days
2  2018-05-28 2018-06-08             FALSE           FALSE  11 days
3  2020-04-11 2020-04-26             FALSE           FALSE  15 days
4  2020-04-11 2020-04-12             FALSE           FALSE   1 days
5  2016-06-02 2016-06-07             FALSE           FALSE   5 days
6  2018-09-09 2018-09-13             FALSE           FALSE   4 days
7  2018-09-09 2018-09-13             FALSE           FALSE   4 days
8  2016-08-30 2016-09-02             FALSE           FALSE   3 days
9  2016-06-02 2016-06-07             FALSE           FALSE   5 days
10 2018-05-28 2018-06-08             FALSE           FALSE  11 days
11 2018-04-28 2018-05-09             FALSE           FALSE  11 days
12 2017-06-22 2017-06-25             FALSE           FALSE   3 days
13 2017-07-20 2017-07-29             FALSE           FALSE   9 days
14 2017-06-22 2017-06-25             FALSE           FALSE   3 days
15 2017-09-05 2017-09-11             FALSE           FALSE   6 days
16 2017-06-22 2017-06-25             FALSE           FALSE   3 days
17 2017-06-22 2017-06-25             FALSE           FALSE   3 days
18 2016-06-02 2016-06-07             FALSE           FALSE   5 days
19 2017-06-22 2017-06-25             FALSE           FALSE   3 days
20 2017-06-22 2017-06-25             FALSE           FALSE   3 days
21 2017-06-22 2017-06-25             FALSE           FALSE   3 days
22 2017-06-22 2017-06-25             FALSE           FALSE   3 days
23 2016-11-07 2016-11-18             FALSE           FALSE  11 days
24 2016-11-07 2016-11-18             FALSE           FALSE  11 days
25 2016-11-07 2016-11-17             FALSE           FALSE  10 days
   frame_number
1           101
2            44
3            61
4            49
5             9
6             3
7           135
8             1
9            11
10           84
11          119
12          121
13            4
14           63
15           63
16           51
17           88
18           12
19           86
20           83
21          131
22           96
23           33
24           39
25           27

These 25 failures may be a result of manufactureing defects or incorrect installation, which is not the failure mode that I want to analyse here. Therefore, following the approach of Hong, Meeker, and McCalley (2009), I treat these observations as a case of right censoring (i.e. their failure due to wear was right censored by their failure from another cause).

1 Constructing an informative prior

We know that the average lifetime is around five years, and that almost all idlers should have failed by eight (0.99). I encode this information into a joint prior–using the method I describe in Chapter 2–by specifying the elicitation times at five and eight years and the expectation(standard deviation) of the CDF at these times as 0.5(sd 0.15) and 0.99(sd 0.05) respectively. The resulting informative prior is shown in Figure 2.

(a) The joint draws of the shape and scale.
(b) The resulting uncertainty in the CDF.
Figure 2: The informative joint prior that results from encoding x, y, and z.

2 Fitting the model in Stan

In this section I fit the Stan model for imputing partialy observed lifetimes and missing truncation times from Chapter 2 to the idler-frame data. I define the model and then prepare the data for stan. Since the conveyor has been in operation for 21 years I set t_start to 15. Given that the t_start is fifteen years.

stan_model_unknown_lt_rc_inf <- stan_model(
  model_code = "
functions {
// function to simplify the calculation of eta and beta
real fn(real tCDF) {
  return log(-log1m(tCDF));
}
}
data {
int N_obs;                             # N fully observed lives
int N_rc;                              # N right censored only lives
int N_lt;                              # N left truncated only lives
int N_lt_rc;                           # N right cens and left trunc lives
array[N_obs] real<lower=0> y_obs;      # Fully observed lifetimes
array[N_rc] real<lower=0> y_rc;        # Right censored lifetimes
array[N_lt] real<lower=0> y_lt;        # Left trunc lifetimes
array[N_lt_rc] real<lower=0> y_lt_rc;  # right cens and left trunc lifetimes
real<lower=0> t_start;                 # start of the observation window
// Define the prior
real t_1;
real t_2;
real t1_mean;
real t1_var;
real t2_mean;
real t2_var;
}
transformed data{
array[N_lt] real<lower=0> y_lt_upper;  # The upper bound of the left trunc lives

for (m in 1:N_lt){
  y_lt_upper[m] = y_lt[m] + t_start;   # Upper bound = lower bound + start of observation
}

}
parameters {
real<lower = 0, upper = 1> t1CDF;
real<lower = t1CDF, upper = 1> t2CDF;
array[N_rc] real<lower=y_rc> y_rc_hat;   # imputed right censored values
array[N_lt] real<lower=y_lt, upper=y_lt_upper> y_lt_hat;  # imputed left trunc values
array[N_lt_rc] real<lower=y_lt_rc> y_lt_rc_hat;   # imputed left trunc and right cens values
array[N_lt_rc] real<lower=0, upper=1> t_lt_rc_st; # imputed left truncation times for left trunc and right cens values (standardised)
}
transformed parameters{
real<lower = 0> beta;
real<lower = 0> eta;
array[N_lt] real t_lt;  # imputed left trunc times for left trunc values
array[N_lt_rc] real<lower=0, upper=t_start> t_lt_rc_upper;
array[N_lt_rc] real<lower=0, upper=t_lt_rc_upper> t_lt_rc;  # imputed left trunc times for left trunc and right cens values

// calculate Weibull paramaters based on the
// draws from the CDF at t1 and t2.
beta = (fn(t2CDF) - fn(t1CDF)) / log(t_2 / t_1);
eta = exp(log(t_1) - (fn(t1CDF) / beta));

for (i in 1:N_lt) {
  t_lt[i] = y_lt_hat[i] - y_lt[i];
}

for (k in 1:N_lt_rc){
  if ((y_lt_rc_hat[k] - y_lt_rc[k]) < t_start)
    t_lt_rc_upper[k] = y_lt_rc_hat[k] - y_lt_rc[k];
  else
    t_lt_rc_upper[k] = t_start;

  t_lt_rc[k] = t_lt_rc_st[k] * t_lt_rc_upper[k];
}
}
model{
// Data model
// fully observed lifetimes
y_obs ~ weibull(beta, eta);
// right censored lifetimes
y_rc_hat ~ weibull(beta, eta);
// left truncated lifetimes
for (i in 1:N_lt) {
  y_lt_hat[i] ~ weibull(beta, eta) T[t_lt[i], ]; 
}
// left truncated and right censored lifetimes
for (j in 1:N_lt_rc) {
  y_lt_rc_hat[j] ~ weibull(beta, eta) T[t_lt_rc[j], ]; 
}

// Prior model
t1CDF ~ normal(t1_mean, t1_var);
t2CDF ~ normal(t2_mean, t2_var);
t_lt_rc_st ~ uniform(0, 1);
}

"
)

Before sampling, I prepare the data to pass to Stan.

df_obs <- idler_data %>%
  filter(lifetime > (3 * 7)) %>%
  filter(!(censored_at_start | censored_at_end))
df_rc <- idler_data %>%
  filter((!censored_at_start & censored_at_end) | 
    ((lifetime < (3 * 7)) & !(censored_at_start | censored_at_end)))
df_lt <- idler_data %>%
  filter((censored_at_start & !censored_at_end))
df_lt_rc <- idler_data %>%
  filter((censored_at_start & censored_at_end))
idler_frame_stan_data <- list(
  N_obs = nrow(df_obs),
  N_rc = nrow(df_rc),
  N_lt = nrow(df_lt),
  N_lt_rc = nrow(df_lt_rc),
  y_obs = array(df_obs$lifetime),
  y_rc = array(df_rc$lifetime),
  y_lt = array(df_lt$lifetime),
  y_lt_rc = array(df_lt_rc$lifetime),
  t_start = 15 * 365
)

To sample draws from the posterior I use 4 chains each 2000 draws long and with a burn in of 500 iterations and no thinning.

idler_stan_fit <- sampling(
  stan_model_unknown_lt_rc_inf,
  c(
    idler_frame_stan_data,
    t_1 = 5 * 365,
    t_2 = (5 + 3) * 365,
    t1_mean = 0.5,
    t1_var = 0.15,
    t2_mean = 0.99,
    t2_var = 0.05
  ),
  chains = 4,
  cores = 4,
  iter = 2000,
  warmup = 500,
  seed = 246
)

Table 3 summarises the draws of the parameters from the posterior. The chain diagnostics appear reasonable since there were no divergencies flagged during sampling and in Table 3 the \(\hat{R}\) values are close to \(\approx 1\) and \(n_{\text{eff}}\) is large for both parameters.

Table 3: Summary of sampling and joint posterior of beta and eta.
Parameter mean se_mean sd 2.5% 50% 97.5% n_eff Rhat
beta 1.098247 0.0006098 0.0496948 1.001051 1.097238 1.199277 6640.704 0.9996409
eta 1363.879373 1.2425973 87.6481599 1197.274410 1364.262402 1538.884509 4975.363 0.9995964

3 The posterior distribution

In Figure 3, I plot the posterior draws of the parameters. The left plot shows the posterior draws on their own while the right compares them with the contours of the prior. The posterior sits in the tails of the prior, indicating that there is a slight prior-data conflict.

Show the code
PlotPostCDF <- function(stan_fit, x_range = c(0, 5), res = 0.05) {
  grid <- seq(x_range[1], x_range[2], res)
  p_cdf <- stan_fit %>%
    as_draws_df() %>%
    select(beta, eta) %>%
    split(., seq(nrow(.))) %>%
    lapply(
      function(draw) {
        df_CDF <- data.frame(
          q = grid,
          p = pweibull(grid, draw$beta, draw$eta)
        )
        return(df_CDF)
      }
    ) %>%
    bind_rows() %>%
    ggplot() +
    stat_lineribbon(
      aes(x = q, y = p),
      .width = c(0.5, 0.9)
    ) +
    scale_fill_brewer() +
    xlab("t") +
    ylab("cdf") +
    theme_minimal() +
    theme(legend.position = "none")
    return(p_cdf)
}
(a) Plotted with the same axis scales as Figure 2.
(b) Zoomed in.
Figure 3: The joint draws of the shape and scale from the posterior.

Figure 4 shows the coresponding posterior uncertainty in the CDF of the Weibull lifetime distribution. The uncertainty surounding the CDF is now much more precise than the prior in Figure 2.

Figure 4: The posterior uncertainty about the CDF.

4 Expected failure times

The posterior samples also provide draws of the imputed values of the censored lifetimes. Using these draws, I can obtain predictive draws for the remaining useful life (RUL) of the frames still in operation acording to \(RUL_i = \tilde{y}_i - C_i\) where \(C_i\) is the censoring time of the lifetime (or the current age of the frame). Figure 5 shows the predictive RUL distributions for each frame.

(a) Frames 1-72.
(b) Frames 73-143.
Figure 5: The RUL predictive distributions for the idler-framce still curently in opperation.

5 Expected number of failures

Using the predictive draws of the RUL for each of the frames, I also construct a distribution for the cumulative failures going forward from the end of the observation period. I do this by ordering the predictive values of the frames for each draw and then creating a step function for the cumulative number of failures. Ten of these draws are shown in Figure 6.

Figure 6: Ten example draws of the cumulative failures in days following the end of observation.

From the distribution of step functions, I can calculate predictive distributions for the number of failures within the next 1, 3, and 6 months. Figure 7 shows these predictive distributions.

Figure 7: The expected number of failures in the next one (a), three (b), and six (c) months after the end of the observation period.

6 Cost functions

The posterior draws can also be used to propogate the uncertainty in the analysis through utility functions, such as cost functions, that inform long term management decisions. Here I show an example of choosing a suitable fixed time interval to preventatively replace the idlers to avoid large unplanned maintenance costs and downtime. The details of the cost function are provided in Sec. 3.4.3 of the main thesis chapter and an explanation of the code is provided in Alg. 1 in the section. Here I calculate the predictive distributions for fixed time replacement intervals every 1-44 shutdowns of the mine, these distributions are shown in Figure 8.

rvar_pweibull <- rfun(pweibull)

CumulativeBeltFailures <- function(
  beta, eta,        # Weibull parameters
  N_units = 143,    # number of frames
  N_shuts = 44,     # the number of shutdowns to run sim for
  delta = (6 * 7),  # operation time between shuts
  N_runs = 1000,    # number of simulation runs
  P_rm = 0.05       # Probability of frame failure needing reactive maintenance
  ) {
  unit_ages_start <- rep(0, N_units * N_runs)
  K_uf <- array(rep(NA, N_shuts * N_runs), dim = c(N_shuts, N_runs))
  K_rm <- array(rep(NA, N_shuts * N_runs), dim = c(N_shuts, N_runs))
  for (shut in 1:N_shuts) {
    # Calculate age at end of production period
    unit_ages_end <- unit_ages_start + delta
    # Calculate probability of unit failing given its age
    P_uf <- (pweibull(unit_ages_end, beta, eta) -  pweibull(unit_ages_start, beta, eta)) /
      (1 - pweibull(unit_ages_start, beta, eta))
    # Simulate unit failurs
    failures <-  rbinom(
      n = N_units * N_runs,
      size = 1,
      prob = P_uf 
    ) %>%
    matrix(
      nrow = N_units,
      ncol = N_runs
    )
    # Put in matrix
    # apply sum
    K_uf[shut, ] <- failures %>%
      apply(FUN = sum, MARGIN = 2)
    # Simulate reactive maintenance
    K_rm[shut, ] <- rbinom(
      n = N_runs,
      size = K_uf[shut, ],
      prob = P_rm
    )
    # Update unit ages
    unit_ages_start <- unit_ages_end
    if(any(failures == 1)) unit_ages_start[as.logical(failures)] <- 0
  }
  K_rm %>%
    apply(FUN = cumsum, MARGIN = 2) %>%   # Take cumulative sum
    apply(FUN = mean, MARGIN = 1) %>%     # Calculate expected values
    return()
}
# extract draws for parameters from posterior
joint_draws <- idler_stan_fit %>%
  as_draws_df() %>%
  spread_draws(beta, eta)
# thin draws
subset_of_draws <- seq(1, nrow(joint_draws), 5)
# apply sim over draws of parameters
E_N_reactive_maintenance <- mapply(
  function(beta, eta) CumulativeBeltFailures(beta = beta, eta = eta),
  joint_draws$beta[subset_of_draws],
  joint_draws$eta[subset_of_draws]
)
Figure 8: The predictive distribution of cost per unit time for the diferent choices of fixed time replacement interval.
Table 4

References

Hong, Yili, William Q. Meeker, and James D. McCalley. 2009. Prediction of remaining life of power transformers based on left truncated and right censored lifetime data.” The Annals of Applied Statistics 3 (2): 857–79. https://doi.org/10.1214/00-AOAS231.